library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)
library(httr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:httr':
## 
##     config
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
knitr::opts_chunk$set(
  fig.width = 6,
  fig.asp = .6,
  out.width = "90%"
)
theme_set(theme_minimal() + theme(legend.position = "bottom"))
options(
  ggplot2.continuous.colour = "Reds",
  ggplot2.continuous.fill = "Reds"
)
 scale_colour_discrete = scale_color_brewer(palette = "Reds")
 scale_fill_discrete = scale_color_brewer(palette = "Reds")

Import dataset and Tidy data

raw_sub_crime = 
  read_csv("./data/subwaycrime.csv") %>% 
  janitor::clean_names() %>% 
  rename("start_date" = "cmplnt_fr_dt", "start_time" = "cmplnt_fr_tm", "end_date" = "cmplnt_to_dt", "end_time" = "cmplnt_to_tm", "law_cat" = "law_cat_cd", "crime_event" = "ofns_desc") %>% 
  mutate(start_date = as.character(as.Date(start_date, "%m/%d/%Y")), 
         end_date = as.character(as.Date(end_date, "%m/%d/%Y"))) %>% 
  filter(start_date > "2021-01-01")
## New names:
## * `` -> ...1
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 6244 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (20): BORO_NM, CMPLNT_FR_DT, CMPLNT_TO_DT, CRM_ATPT_CPTD_CD, JURIS_DESC...
## dbl  (11): ...1, CMPLNT_NUM, ADDR_PCT_CD, JURISDICTION_CODE, KY_CD, PD_CD, T...
## lgl   (4): HADEVELOPT, HOUSING_PSA, LOC_OF_OCCUR_DESC, PARKS_NM
## time  (2): CMPLNT_FR_TM, CMPLNT_TO_TM
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Crime event v.s. Month

Generally, the number of cirime events each month

sub_crime_freq = 
  raw_sub_crime %>% 
  select(start_date, start_time, crime_event, law_cat) %>% 
  mutate(start_date = substring(start_date,1,7))
  
plot_1 = 
  sub_crime_freq %>% 
  group_by(start_date) %>% 
  summarise(event_num = n()) %>% 
  plot_ly(
    x = ~start_date, y = ~event_num, type = "bar"
  )

layout(plot_1, title = "Crime events over month", xaxis = list(title = "Month"), yaxis = list(title = "Number of Crime Events"))